﻿Imports DRO.connections.connections_admin
Imports DRO.DB.DB
Imports DRO.UTILITIES
Imports DRO.tokenizing_utilities

Public Class Form_tokenize
    Dim ds As DataSet
    Dim num_recs As Integer
    Private Sub Form_tokenize_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load
        get_connection_name("clist")
        clear("cars")
        Dim sg As sql_generator
        sg = New sql_generator
        sg.verb = "SELECT"
        sg.add_list("clist")

        ds = get_dataset(sg, num_recs)
        If num_recs < 0 Then
            Stop
        End If
        make_tokens()
    End Sub
    Dim tokens() As String
    Dim inner_html As String
    Sub make_tokens()
        Dim delims As String = " ()"
        Dim i As Integer
        For i = 0 To num_recs - 1
            'Dim dr As DataRow
            inner_html = nz(get_field(ds, i, "inner_html")).tolower
            If inner_html.Contains("toyota") Then
                inner_html = inner_html.Replace("*", "")
                inner_html = inner_html.Replace("$$$$$$$$$$$$$$$", "")
                inner_html = inner_html.Replace("$$$$$$$$$$$$$$", "")
                Debug.Print("make_tokens: " & inner_html)
                'Stop
                'Stop

                tokens = get_tokens_all(inner_html, delims)
                classify_tokens()
            End If

            'Stop
        Next
    End Sub
    Dim make As String
    Dim model As String
    Dim model_year As String
    Dim price As String
    Sub classify_tokens()
        Dim t As String
        make = "unknown"
        model = "unknown"
        model_year = "unknown"
        price = "9999"
        Dim si As sql_generator
        si = New sql_generator
        si.verb = "INSERT"
        si.add_list("cars")
        Dim years As String
        years = "-1995-1996-1997-1998-1999-2000-2001-2002-2003-2004-2005-2008-2009-2010-2011-"
        years = years & "95-96-97-98-99-"

        For Each t In tokens
            Debug.Print("classify: " & t)
            If t Is Nothing Then
            Else
                If t = "[" Then GoTo next_one
                If t.StartsWith("$") And price = "9999" Then
                    price = t.Replace(",", "")

                    si.add_insert_value("price", price)
                End If
                If years Like "*-" & t & "-*" And model_year = "unknown" Then
                    model_year = t
                    si.add_insert_value("model_year", model_year)
                End If
                If t = "toyota" Then
                    make = t
                    si.add_insert_value("make", make)
                End If
                If "-camry-corolla-tercel-tundra-" Like "*-" & t & "-*" And model = "unknown" Then
                    model = t
                    si.add_insert_value("model", model)
                End If
            End If
            'Stop
next_one:
        Next
        Dim k As Integer
        si.show_statement()
        k = si.update_database
        If k < 0 Then
            Stop
        End If
    End Sub
End Class